Homeownership is one of the most important ways of acculating wealth. In New York City, proprty values are high and thus homeownership is out of reach for many.Using Home Mortgage Disclosure Act data, I explore the demographics of mortgage applicants and the various outcomes of their applications in order to better understand who receives loans and who does not. Looking at how outcomes change over time and by group, allows us to better understand the way opportunities within the city are changing.
#Loan applications are split pretty evenly between Manhattan, Brooklyn, and Queens, with White applicants representing a large share of the applications. The Unknown category is made up of applicants who did not disclose their race in their loan applications. Disparities in incomes between boroughs are shown in the second graph.
#treemap for counties and race
data_tree <- hmda_ny_5yrs %>% group_by(boro, race_alternative) %>%
summarise(n = n())
ggplot(data_tree, aes(area =n, label = race_alternative, subgroup = boro, fill = race_alternative)) +
geom_treemap() +
geom_treemap_subgroup_border(color = '#F2F5F2') +
geom_treemap_text(place= "bottomright", color = '#F2F5F2',
fontface = "italic") +
geom_treemap_subgroup_text(place = "topleft", color = '#F2F5F2') +
scale_fill_manual(values =
c('#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70')) +
labs(title = "Loan Applicants Organized by Borough and \nRace HMDA Data 2013-2017") +
theme(text = element_text(family = "Meiryo"),
legend.position = "none", plot.title = element_text(size = 18))
data_tree <- hmda_ny_5yrs %>% group_by(boro, income_bins) %>%
summarise(n = n())
ggplot(data_tree, aes(area =n, label = income_bins, subgroup = boro, fill = income_bins)) +
geom_treemap() +
geom_treemap_subgroup_border(color = '#F2F5F2') +
geom_treemap_text(place= "bottomright", color = '#F2F5F2',
fontface = "italic") +
geom_treemap_subgroup_text(place = "topleft", color = '#F2F5F2') +
scale_fill_manual(values =
c('#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70')) +
labs(title = "Loan Applicants Organized by Borough and \nIncome Quartiles HMDA Data 2013-2017") +
theme(text = element_text(family = "Meiryo"),
legend.position = "none", plot.title = element_text(size = 18))
#looking at differences in approval rates by male/female
hmda_ny_5yrs <- hmda_ny_5yrs %>%
mutate(win_income = winsorize(applicant_income_000s, probs = c(.01, .99)))
#filter by people without co-applicants
ggplot(filter(hmda_ny_5yrs, applicant_sex_name %in% c("Female","Male"), co_applicant_ethnicity_name == "No co-applicant"),
aes(y=win_income, x=applicant_sex_name, fill=outcome), alpha = .3) +
geom_boxplot(outlier.size = .5) + scale_y_log10(expand=c(0,0)) +
scale_fill_manual(values = c('#1f8a70', '#E8291A', '#FFE11A')) +
theme(text = element_text(family = "Meiryo"),
panel.background = element_rect(fill = '#F2F5F2'),
panel.grid.major = element_line(color = "#404040"),
plot.subtitle = element_text(face = "italic"),
panel.grid.minor = element_blank(),
plot.title = element_text(size = 18)) +
labs(fill = "Outcome", x = "Gender of Applicant",
y = "Income (in $1,000s, Log Scale)",
title = "On Average, Male Applicants have Higher Incomes \nthan Female Applicants",
subtitle = "Income by Gender, grouped by Loan Outcome", caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017")
#approval rates by nta
#number of loans apps by nta
apps_by_nta <- hmda_ny_5yrs %>%
group_by(ntaname) %>%
summarise(total_apps = n())
#approvals by nta
approval_counts_by_nta <- hmda_ny_5yrs %>%
filter(outcome == "Approved") %>%
group_by(ntaname) %>%
summarise(total_approved = n())
approval_rates_by_nta <-
approval_counts_by_nta %>%
left_join(apps_by_nta, by = "ntaname") %>%
mutate(approval_rate = total_approved/total_apps*100)
nta_merged <- left_join(nta_shape, approval_rates_by_nta,
by = c("ntaname")) %>%
st_as_sf()
#dealing with ggplot
nta_merged$breaks <- cut(nta_merged$approval_rate, c(40, 50, 60, 70, 80))
#plot- approval rates by census block ####
ggplot() +
geom_sf(data = nta_merged,
aes(fill = breaks), color = "white", lwd = 0) +
labs(title = "Most Neighborhoods have Loan Approval \nRates between 50% and 60% ", subtitle= "Percent of Loan Applications Approved by Neighborhood Tabulation Area (NTA)",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017") +
theme(axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_rect(fill = "#ffffff", color = NA),
axis.line = element_blank(),
plot.subtitle = element_text(face = "italic"),
legend.position = c(0.25,.75),
text = element_text(family = "Meiryo"),
plot.title = element_text(size = 18),
axis.ticks = element_blank()) + guides(fill=guide_legend(title="Percent of \nApplications \nApproved (%)")) + scale_fill_manual(values = c("#fef0d9", "#fdcc8a", "#fc8d59", "#d7301f"))
The highest approval rates are in Bay Terrace/Clearview and Forest Hills, Queens, and Prospect Heights, Brooklyn. While Bay Terrace and Prospect Heights are known as affluent neighborhoods, Forest Hills is known for being diverse and middle-income. The lowest approval rates are found low-income neighborhoods such as Far Rockaway, Queens; Parkchester, Bronx; and Starrett City, Brooklyn.
#approval rates by race by nta facet map with race (MAYBE REPLACE ONE ABOVE)
approval_counts_by_race <- hmda_ny_5yrs %>%
filter(outcome == "Approved") %>%
group_by(ntaname, applicant_race_name_1) %>%
summarise(total_approved = n())
df_for_chloropleth_race_approval <- hmda_ny_5yrs %>%
group_by(ntaname, applicant_race_name_1) %>%
summarise(total_apps = n()) %>%
left_join(approval_counts_by_race,
by = c("ntaname", "applicant_race_name_1")) %>%
mutate(approval_rate = total_approved/total_apps*100)
nta_tract_merged <- left_join(df_for_chloropleth_race_approval,nta_shape,
by = "ntaname") %>%
st_as_sf()
#dealing with ggplot
nta_tract_merged$breaks <- cut(nta_tract_merged$approval_rate, c(0, 20, 40, 60, 80, 100))
#plot- approval rates by census block ####
ggplot(data = nta_tract_merged) +
geom_sf(data = nta_tract_merged, aes(fill = breaks), color = "white", lwd = 0) + facet_wrap(~applicant_race_name_1, ncol = 2) +
labs(title = "Asians, Whites, and Unknown Group have \nHigh Loan Approval Rates across New York City", subtitle = "Percent of Loan Applications Approved by Neighborhood Tabulation Area (NTA) by Race",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017") +
theme(axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_rect(fill = "#ffffff", color = NA),
axis.line = element_blank(),
legend.position = "bottom",
plot.subtitle = element_text(face = "italic"),
text = element_text(family = "Meiryo"),
plot.title = element_text(size = 18),
axis.ticks = element_blank()) + guides(fill=guide_legend(title="Percent of Applications Approved (%)")) + scale_fill_manual(values = c("#fef0d9", "#fdcc8a", "#fc8d59", "#e34a33", "#b30000"))
#loan application outcome rates by race and year (facet_grid)
#total loans per quartile, year, race
quartile_race_year <- hmda_ny_5yrs %>% filter(income_bins != "Unknown") %>%
group_by(income_bins, race_alternative, as_of_year, county_name) %>%
summarise(total = n())
quartile_race_year$race_alternative[which(quartile_race_year$race_alternative == "Black or African American")] <- "Black"
quartile_race_year_outcome <- hmda_ny_5yrs %>%
filter(income_bins != "Unknown") %>%
group_by(income_bins, race_alternative, outcome, as_of_year, county_name) %>%
summarise(per_outcome = n())
quartile_race_year_outcome$race_alternative[which(quartile_race_year_outcome$race_alternative == "Black or African American")] <- "Black"
highlights <- data.frame(income_bins =c("Income Quartile 1"), race_alternative =c("Black", "Other"))
quartile_race_year_outcome %>%
left_join(quartile_race_year,
by = c("income_bins", "race_alternative", "as_of_year", "county_name")) %>%
mutate(outcome_rate = per_outcome/total*100) %>%
filter(race_alternative %in% c("Asian", "White", "Black", "Other"),
county_name == "Kings County", !is.na(income_bins)) %>%
ggplot() + geom_line(aes(x = as_of_year, y = outcome_rate, color = outcome)) +
facet_wrap(race_alternative~income_bins) +
scale_color_manual(values = c('#1f8a70', '#E8291A', '#FFE11A')) +
theme(text = element_text(family = "Meiryo"),
plot.subtitle = element_text(face = "italic"),
plot.title = element_text(size = 18),
panel.background = element_rect(fill = '#F2F5F2'),
panel.spacing = unit(1.5, "lines"), legend.position = "bottom", axis.ticks = element_blank()) +
labs(title = "Loan Approval Rates in Brooklyn for Low-to-Middle Income Blacks \nand Other Group has been Decreasing, while most other Races and Quartiles \nhave Steady Rates",
subtitle = "Approval Rates per Year by Income Quartiles and Race",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017",
color = "Outcome", x = "Year", y = "Percent of Applications (%)") +
geom_rect(data = highlights,
aes(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf),
fill = '#0D177F', alpha= .2)
#approval rates for low income applicants, by NTA faceted by year
apps_outcome_Q1_nta <- hmda_ny_5yrs %>%
filter(income_bins == "Income Quartile 1") %>%
group_by(ntacode, as_of_year, outcome) %>%
summarise(outcome_tot = n())
apps_total_Q1_nta <- hmda_ny_5yrs %>%
filter(income_bins == "Income Quartile 1") %>%
group_by(ntacode, as_of_year) %>%
summarise(tot_apps = n())
approval_rate_5yr <- apps_outcome_Q1_nta %>%
left_join(apps_total_Q1_nta, by = c("ntacode", "as_of_year")) %>%
filter(outcome == "Approved") %>%
mutate(approval_rate = outcome_tot/tot_apps*100)
approval_rate_5yr$breaks <- cut(approval_rate_5yr$approval_rate, c(0, 20, 40, 60, 80, 100))
#need to merge back with census hmda and then merge that with census tracts
approval_rate_5yr %>%
left_join(nta_shape, by = "ntacode") %>%
ggplot() + geom_sf(aes(fill = breaks), lwd = 0) +
facet_wrap(~as_of_year) + scale_fill_manual(values = c('#ffffd4', '#fdd2c0', '#f3a683', '#e27a48', '#cc4c02')) +
guides(fill=guide_legend(title="Approval Rate (%)")) +
theme(axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_rect(fill = "#ffffff", color = NA),
axis.line = element_blank(),
legend.position = c(.8, .3),
legend.background = element_blank(),
plot.title = element_text(size = 18),
plot.subtitle = element_text(face = "italic"),
text = element_text(family = "Meiryo"),
axis.ticks = element_blank()) + labs(title ="Approval Rates for Low-to-Middle Income Applicants Decline \nin most Boroughs, while Rates on Queens and Staten \nIsland remain High", subtitle = "Approval Rates for Applicants with Incomes below $76,000/year", caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017")
#ridges plot of loan to income ratio
breaks = c(seq(0, 15, by=2.5))
ggplot(hmda_ny_5yrs %>% filter(co_applicant_ethnicity_name== "No co-applicant"), aes(x = win_loan_income_ratio, y = race_alternative,
fill = race_alternative)) +
geom_density_ridges() + geom_vline(xintercept = 2.5) + scale_x_continuous(limits = c(0, 15), breaks = breaks) +
theme(plot.title = element_text(size = 18),
plot.subtitle = element_text(face = "italic"),
text = element_text(family = "Meiryo"),
panel.grid.minor = element_blank()) +
scale_fill_manual(values =
c('#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70')) + guides(fill = FALSE) + labs(title = "Historically Disadvantaged Groups (Black, Other) \nhave a Higher Concentration of \nLoan-to-Income Ratios over 2.5", subtitle = "Investopedia states that a loan amount below 2.5 times \nan applicant's annual income is affordable to the applicant", x = "Loan-to-Income Ratio", y = "Race", caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017")
num_denied_by_race <- hmda_ny_5yrs %>%
filter(outcome == "Denied", !is.na(denial_reason_alt)) %>%
group_by(race_alternative) %>%
summarise(tot_apps_denied = n())
denial_reasons_race <- hmda_ny_5yrs %>%
filter(outcome == "Denied", !is.na(denial_reason_alt)) %>%
group_by(race_alternative, denial_reason_alt) %>%
summarise(denied_by_reason = n())
#denial reasons rates race ####
denial_reasons_race %>%
left_join(num_denied_by_race, by = "race_alternative") %>%
mutate(denial_rate = denied_by_reason/tot_apps_denied) %>%
ggplot() +
geom_bar(aes(x = race_alternative,
y = denial_rate,
fill = denial_reason_alt),
stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
text = element_text(family = "Meiryo"),
panel.background = element_rect(fill = '#F2F5F2'),
axis.ticks = element_blank(),
axis.line = element_blank(),
plot.title = element_text(size = 16),
panel.grid = element_blank()) +
scale_x_discrete(expand = c(0,0)) + scale_y_continuous(expand = c(0,0)) +
labs(title = "Black Applicants and \nOther (Native American and Pacific Islanders) \nApplicants Denials are more frequently \ndue to Credit History, while this is an Infrequent Denial \nReason for Whites and Asians",
subtitle = "Distribution of Denial Reasons by Race",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017",
x = "Applicant Race", y = "Denial Rate", fill = "Denial Reason") + scale_fill_discrete(name = "Denial Reason") + scale_fill_manual(values = c('#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70'))